Data analysis across treatments at 4 weeks post-challenge, CONU as reference

Author

Filipe Figueiredo

R setup
require("knitr")
opts_knit$set(root.dir = '~/Documents/PhD/Thesis/quantseq_dataAnalysis/deseq2_dataAnalysis_2024/results/results_4wpc')
Code

#| echo: false
p {
  text-align: justify
}

figcaption {
  text-align: center;
}

h1 {
text-align: center;
}

h2 {
text-align: center;
}

h3 {
text-align: center;
}

h4 {
text-align: center;
}

div {
  text-indent: 50px;
}

/* Add space between figures and text */
<!-- figure { -->
<!--     margin-bottom: 60px; /* Adjust the value as needed */ -->
<!--     margin-top: 60px; -->
<!-- } -->
Loading packages, functions, and results tables
suppressPackageStartupMessages({
  library('tidyverse')
  library('VennDiagram')
  library('clusterProfiler')
  library('gprofiler2')
  library('org.Hs.eg.db')
  library('enrichplot')
})

## Loading functions ----
source(
  '~/Documents/PhD/Thesis/quantseq_dataAnalysis/deseq2_dataAnalysis_2024/github_repo/scripts/functions_data-wrangling_march24.R'
)

immune_related_GOterms <-
  read.table(
    '~/Documents/PhD/Thesis/quantseq_dataAnalysis/deseq2_dataAnalysis_2024/github_repo/goTerm_lists/immune_related_GOterms.tsv',
    header = T,
    sep = '\t'
  )  # loading dataframe containing immune related GO terms

## Loading results ----
setwd(
  '~/Documents/PhD/Thesis/quantseq_dataAnalysis/deseq2_dataAnalysis_2024/results/results_4wpc'
)

results_files <-
  list.files(pattern = 'res_.*')  # regex matching results files
list.data <- list()
for (i in 1:length(results_files)) {
  list.data[[i]] <- load(results_files[i])
}

Differentially expressed genes (DEG) summary

To create this document, raw sequencing data was:

  • Demultiplexed

  • Trimmed

  • Mapped to Ssal_v3.1, which is the current reference assembly for Atlantic salmon (released in 2021)

  • Run through a feature count, which assigns sequencing reads to the mapped genes

  • Gene counts were modelled using DESeq2’s negative binomial distribution

  • Results tables were extracted based on the desired contrasts/comparisons, as displayed in the example below:

res_ivld_vs_conu_4wpc <- results(ddsGroup_ensembl, contrast=c("group","ivld.4wpc","conu.4wpc"))
res_ivhd_vs_conu_4wpc <- results(ddsGroup_ensembl, contrast=c("group","ivhd.4wpc","conu.4wpc"))
res_gata3_vs_conu_4wpc <- results(ddsGroup_ensembl, contrast=c("group","gata3.4wpc","conu.4wpc"))
res_eomes_vs_conu_4wpc <- results(ddsGroup_ensembl, contrast=c("group","eomes.4wpc","conu.4wpc"))
res_dnavaccine_vs_conu_4wpc <- results(ddsGroup_ensembl, contrast=c("group","dnavaccine.4wpc","conu.4wpc"))
DEG summary with CONU as reference
## For loop, significant genes ----
results_files <-
  ls(pattern = '^res_.*conu.4wpc')  # listing Global Environment files matching regex pattern, to be used in the for loop. In this case, starting with res, containing conu, and 1wpc

# Create an empty list to store results
all_significant_genes <- list()

# Assuming results_files is a list of object names
for (name in results_files) {
  # Remove the file extension ".RData" if present
  name <- sub(".RData$", "", name)

  # Retrieve the object from the global environment
  results <- get(name, envir = .GlobalEnv)

  # Call the significant_genes function and store the result
  result_genes <- significant_genes(results)

  # Store the result in the list
  all_significant_genes[[name]] <- result_genes
}

### Renaming dataframes ----
names(all_significant_genes) <-
  c('DNA vaccine',
    'EOMES',
    'GATA3',
    'IV-HD',
    'IV-LD')

### Delisting dataframes ----
list2env(all_significant_genes, envir = .GlobalEnv)  # splitting the dataframes to summarize their contents

### Summarizing differentially expressed genes ----
deg_regulation_summary <-
  lapply(mget(c(
    'DNA vaccine',
    'EOMES',
    'GATA3',
    'IV-HD',
    'IV-LD'
  )), sig_genes_metrics)

### Transforming summary into a tibble ----
deg_regulation_summary <-
  as_tibble(bind_rows(deg_regulation_summary, .id = 'Treatment'))

### Reordering factors to plot ----
deg_regulation_summary$Treatment <-
  factor(
    deg_regulation_summary$Treatment,
    levels = c('IV-LD',
               'IV-HD',
               'DNA vaccine',
               'EOMES',
               'GATA3')
  )

results_files <-
  list.files(pattern = '^res_.*_conu_4wpc')  # regex matching results files
list.data <- list()
for (i in 1:length(results_files)) {
  list.data[[i]] <- load(results_files[i])
}

# List of objects containing all results files matching the regex pattern
objects <- ls(pattern = "^res_.*_conu_4wpc")

# List of treatments
treatments <- c("dnavaccine", "eomes", "gata3", "ivhd", "ivld")

# Initialize an empty list to store data frames
result_list <- list()

# Loop through each treatment and apply the function, also adding a column with treatment information
for (treatment in treatments) {
  obj <- paste0("res_", treatment, "_vs_conu_4wpc")
  result_list[[treatment]] <-
    improved_data_wrangling(get(obj), treatment = treatment, sampling_point = "4wpc")
}

rm(i,
   obj,
   objects,
   ora_down,
   ora_up,
   results_files,
   treatment,
   treatments)

# Vertically merge data frames from all treatments and remove NA
merged_df <- do.call(rbind, result_list) %>% na.omit()

# Reduce and intersect the ID column in result_list to find common differentially regulated genes among all treatments
reduced_intersected_ids <-
  Reduce(intersect, lapply(result_list, function(df)
    df$ID))

# Print the reduced and intersected IDs
print(reduced_intersected_ids)

# Find the common IDs between reduced_intersected_ids and merged_df. This way we are keeping only the common regulated salmon genes
common_ids <- intersect(merged_df$ID, reduced_intersected_ids)

# Subset merged_df to keep only rows with common IDs
# merged_subset <- merged_df[merged_df$ID %in% common_ids,]
merged_subset <-
  subset(merged_df, ID %in% common_ids)  # tidy version

merged_df[duplicated(merged_df$ortholog_name),] %>% arrange(ortholog_name) %>% print(n = 100)  # identifying duplicates in merged_df

# Splitting between up and downregulated genes, and sorting by gene name and adjusted p-value
upregulated_gsea <-
  merged_subset %>% dplyr::filter(log2FC > 0) %>% arrange(ortholog_name, adjusted_p.val)

downregulated_gsea <-
  merged_subset %>% dplyr::filter(log2FC < 0) %>% arrange(ortholog_name, adjusted_p.val)


# Keeping just one of the ortholog copies, the one with the lowest adjusted p-value
unique_upregulated <-
  upregulated_gsea[!duplicated(upregulated_gsea$ortholog_ensg),] %>% arrange(ortholog_name)
unique_downregulated <-
  downregulated_gsea[!duplicated(downregulated_gsea$ortholog_ensg),] %>% arrange(ortholog_name)

# Merging down and upregulated dataframes after removing duplicates
enrichment_unique_common <-
  rbind(unique_downregulated, unique_upregulated) %>% dplyr::arrange(desc(log2FC))

# Arranging matrix for GSEA
enrichment_gsea_common <- enrichment_unique_common$log2FC

names(enrichment_gsea_common) <-
  enrichment_unique_common$ortholog_ensg
DEG summary plot with CONU as reference
deg_regulation_summary %>%
  ggplot(aes(x = Treatment, y = n, fill = Regulation)) +
  geom_bar(
    stat = 'identity',
    position = 'dodge',
    colour = 'black',
    linewidth = .2
  ) +
  scale_fill_manual(values = c('#BAFFC9',
                               '#FFDFBA')) +
  ylab('Number of genes') +
  geom_text(
    aes(label = n),
    position = position_dodge(width = 0.9),
    vjust = -0.5,
    size = 4
  ) +
  # ggtitle(label = 'Differentially Expressed Genes',
  #         subtitle = 'Cardiac tissue at 1 WPC, CONU as reference') +
  theme_light() +
  theme(panel.grid = element_blank()) +
  theme(text = element_text(size = 14, family = 'serif')) +
  theme(axis.text.x = element_text(angle = 0, vjust = 0.5)) +
  geom_vline(
    xintercept = c(1.5, 2.5, 3.5, 4.5, 6.5, 7.5, 8.5),
    linetype = 'dotted',
    linewidth = 0.2
  ) +
  geom_hline(yintercept = 0, size = 0.2)

Figure 1: Summary of differentially expressed genes in the heart at 4 WPC with CONU as reference
DEG summary with pTagRFP/IV-LD as reference
### For loop, significant genes ----
results_files <-
  ls(pattern = '^res_.*(ptag|ivld).4wpc')  # listing Global Environment files matching regex pattern, to be used in the for loop. In this case, starting with res, and containing conu or ivld, and 4wpc

# Create an empty list to store results
all_significant_genes <- list()

# Assuming results_files is a list of object names
for (name in results_files) {
  # Remove the file extension ".RData" if present
  name <- sub(".RData$", "", name)
  
  # Retrieve the object from the global environment
  results <- get(name, envir = .GlobalEnv)
  
  # Call the significant_genes function and store the result
  result_genes <- significant_genes(results)
  
  # Store the result in the list
  all_significant_genes[[name]] <- result_genes
}

### Renaming dataframes ----
names(all_significant_genes) <-
  c(
    'DNA vaccine vs IV-LD',
    'DNA vaccine vs pTagRFP',
    'EOMES vs IV-LD',
    'EOMES vs pTagRFP',
    'GATA3 vs IV-LD',
    'GATA3 vs pTagRFP',
    'IV-HD vs IV-LD',
    'IV-HD vs pTagRFP',
    'IV-LD vs pTagRFP'
  )

### Delisting dataframes ----
list2env(all_significant_genes, envir = .GlobalEnv)  # splitting the dataframes to summarize their contents

### Summarizing differentially expressed genes ----
deg_regulation_summary <-
  lapply(mget(
    c(
      'DNA vaccine vs IV-LD',
      'DNA vaccine vs pTagRFP',
      'EOMES vs IV-LD',
      'EOMES vs pTagRFP',
      'GATA3 vs IV-LD',
      'GATA3 vs pTagRFP',
      'IV-HD vs IV-LD',
      'IV-HD vs pTagRFP',
      'IV-LD vs pTagRFP'
    )
  ), sig_genes_metrics)

### Transforming summary into a tibble ----
deg_regulation_summary <-
  as_tibble(bind_rows(deg_regulation_summary, .id = 'Contrast'))

deg_regulation_summary <-
  deg_regulation_summary %>% add_row(Contrast = 'DNA vaccine vs IV-LD',
                                     Regulation = 'downregulated',
                                     n = 0)


### Reordering factors to plot ----
deg_regulation_summary$Contrast <-
  factor(
    deg_regulation_summary$Contrast,
    levels = c(
      'IV-LD vs pTagRFP',
      'IV-HD vs pTagRFP',
      'DNA vaccine vs pTagRFP',
      'EOMES vs pTagRFP',
      'GATA3 vs pTagRFP',
      'IV-HD vs IV-LD',
      'DNA vaccine vs IV-LD',
      'EOMES vs IV-LD',
      'GATA3 vs IV-LD'
    )
  )
DEG summary plot with pTagRFP/IV-LD as reference
deg_regulation_summary %>%
  ggplot(aes(x = Contrast, y = n, fill = Regulation)) +
  geom_bar(
    stat = 'identity',
    position = 'dodge',
    colour = 'black',
    linewidth = .2
  ) +
  scale_fill_brewer(palette = 'Dark2') +
  ylab('Number of genes') +
  geom_text(
    aes(label = n),
    position = position_dodge(width = 0.9),
    vjust = -0.5,
    size = 4
  ) +
  # ggtitle(label = 'Differentially Expressed Genes',
  #         subtitle = 'Cardiac tissue at 4 WPC') +
  theme_light() +
  theme(panel.grid = element_blank()) +
  theme(text = element_text(size = 14, family = 'serif')) +
  theme(axis.text.x = element_text(angle = 270, vjust = 0.5)) +
  geom_vline(
    xintercept = c(1.5, 2.5, 3.5, 4.5, 6.5, 7.5, 8.5),
    linetype = 'dotted',
    linewidth = 0.2
  ) +
  geom_vline(xintercept = 5.5,
             linetype = 'solid',
             linewidth = 0.3) +
  geom_hline(yintercept = 0, size = 0.2) +
  annotate(
    'text',
    x = 3,
    y = 550,
    label = 'vs pTagRFP',
    size = 4,
    family = 'serif'
  ) +
  annotate(
    'text',
    x = 7.5,
    y = 550,
    label = 'vs IV-LD',
    size = 4,
    family = 'serif'
  )

Figure 2: Summary of differentially expressed genes in the heart at 4 WPC with pTagRFP or IV-LD as reference



Extracting signiticantly regulated genes
# Get names of all objects in global environment with a regex pattern matching 'res_.*_conu_4wpc'
res_objects <- ls(pattern = "res_.*_conu_4wpc")

# Apply significant_genes function to each res object
for (res_object in res_objects) {
  # Get the object from the global environment
  result <- get(res_object)
  
  # Apply significant_genes function
  significant_result <- significant_genes(result)
  
  # Assign the result back to the global environment
  assign(paste0("significant_", res_object), significant_result, envir = .GlobalEnv)
}

rm(list.data, result, i, res_object, res_objects, results_files, significant_result)


Venn diagrams and ortholog over-representation analysis


The results tables used in these Venn diagrams and ORA were extracted using CONU as reference.

Downregulated

Significantly downregulated genes
# create list of downregulated geneIDs
b <- list(
  A = significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC < 0, ]$ID,
  B = significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC < 0, ]$ID,
  C = significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC < 0, ]$ID,
  D = significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC < 0, ]$ID,
  E = significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC < 0, ]$ID
)

# add treatment names
names(b) <-
  c('DNA vaccine', 'EOMES', 'GATA3', 'IVHD', 'IVLD')

# check gene counts per treatment
kable((sapply(b, length)), col.names = c('count'))
count
DNA vaccine 858
EOMES 1353
GATA3 1429
IVHD 608
IVLD 250


Downregulated Venn diagram
venn1 <- display_venn(
  b,
  fill = c('#cdb4db', '#bde0fe', '#ccd5ae', '#d4a373', '#f08080'),
  lwd = 1,
  cex = 1,
  cat.cex = 1,
  cat.fontfamily = 'serif',
  # cat.fontface = 'bold',
  cat.default.pos = 'outer',
  cat.dist = c(0.20, 0.20, 0.22, 0.20,.20),
  cat.pos = c(360, 360, 250, 90, 360)
)

Figure 3: Downregulated genes, per treatment, at 4 WPC. CONU as reference


From the Venn diagram, I gathered the genes that are common to all treatments, and converted them to h. sapiens orthologs. They are listed in Table 1.

Common downregulated genes
common_genes_downregulated_4wpc <-
  Reduce(
    intersect,
    list(
      significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC < 0,]$ID,
      significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC < 0,]$ID,
      significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC < 0,]$ID,
      significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC  < 0,]$ID,
      significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC < 0,]$ID
    )
  )

common_downregulated_orthologs_4wpc <- gorth(
  query = common_genes_downregulated_4wpc,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

common_downregulated_orthologs_tbl_4wpc <-
  as_tibble(common_downregulated_orthologs_4wpc) %>% dplyr::select(.,
                                                                   input,
                                                                   ortholog_name,
                                                                   ortholog_ensg,
                                                                   description) %>% dplyr::rename(
                                                                     .,
                                                                     ssalar_ensembl = input,
                                                                     hsapiens_ortholog = ortholog_name,
                                                                     hsapiens_ensembl = ortholog_ensg,
                                                                     description = description
                                                                   )

common_downregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 1: Common downregulated orthologs at 4WPC
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000056136 COPB1 ENSG00000129083 COPI coat complex subunit beta 1 [Source:HGNC Symbol;Acc:HGNC:2231]
ENSSSAG00000008058 CHMP1B ENSG00000255112 charged multivesicular body protein 1B [Source:HGNC Symbol;Acc:HGNC:24287]
ENSSSAG00000042725 ACTR1A ENSG00000138107 actin related protein 1A [Source:HGNC Symbol;Acc:HGNC:167]
ENSSSAG00000063331 TLN1 ENSG00000137076 talin 1 [Source:HGNC Symbol;Acc:HGNC:11845]
ENSSSAG00000104939 ATP6AP1 ENSG00000071553 ATPase H+ transporting accessory protein 1 [Source:HGNC Symbol;Acc:HGNC:868]
ENSSSAG00000078656 PDCD6 ENSG00000249915 programmed cell death 6 [Source:HGNC Symbol;Acc:HGNC:8765]
ENSSSAG00000071607 N/A N/A N/A
ENSSSAG00000082229 ARHGDIA ENSG00000141522 Rho GDP dissociation inhibitor alpha [Source:HGNC Symbol;Acc:HGNC:678]
ENSSSAG00000040232 ITGAV ENSG00000138448 integrin subunit alpha V [Source:HGNC Symbol;Acc:HGNC:6150]
ENSSSAG00000043967 CD63 ENSG00000135404 CD63 molecule [Source:HGNC Symbol;Acc:HGNC:1692]
ENSSSAG00000005722 MLKL ENSG00000168404 mixed lineage kinase domain like pseudokinase [Source:HGNC Symbol;Acc:HGNC:26617]
ENSSSAG00000052063 PDLIM1 ENSG00000107438 PDZ and LIM domain 1 [Source:HGNC Symbol;Acc:HGNC:2067]
ENSSSAG00000046808 P4HB ENSG00000185624 prolyl 4-hydroxylase subunit beta [Source:HGNC Symbol;Acc:HGNC:8548]
ENSSSAG00000043688 TKTL2 ENSG00000151005 transketolase like 2 [Source:HGNC Symbol;Acc:HGNC:25313]
ENSSSAG00000042656 DBNL ENSG00000136279 drebrin like [Source:HGNC Symbol;Acc:HGNC:2696]
ENSSSAG00000078094 VRK2 ENSG00000028116 VRK serine/threonine kinase 2 [Source:HGNC Symbol;Acc:HGNC:12719]
ENSSSAG00000049114 MCUB ENSG00000005059 mitochondrial calcium uniporter dominant negative subunit beta [Source:HGNC Symbol;Acc:HGNC:26076]
ENSSSAG00000057363 MYO1C ENSG00000197879 myosin IC [Source:HGNC Symbol;Acc:HGNC:7597]
ENSSSAG00000115504 RNF121 ENSG00000137522 ring finger protein 121 [Source:HGNC Symbol;Acc:HGNC:21070]
ENSSSAG00000046480 PTP4A2 ENSG00000184007 protein tyrosine phosphatase 4A2 [Source:HGNC Symbol;Acc:HGNC:9635]

From an input of 155 salmon ENSEMBL gene IDs, 124 genes were converted to human orthologs.


Over-representation analysis of common DOWNREGULATED genes

ORA on common downregulated genes
common_ora_downreg_4wpc <-
  enrichGO(
    gene = common_downregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )

# common_downregulated_orthologs_tbl_4wpc %>% dplyr::filter(!str_detect(hsapiens_ortholog, 'N/A')) %>% pull(., hsapiens_ortholog) %>% length()
Common downregulated pathways plot
as_tibble(common_ora_downreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'ID' by Adjusted p-value
  dplyr::slice(1:20) %>%
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value') +
                       # guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +
  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 4: Enriched pathways from common downregulated genes at 4 WPC
Common pathways term translation
as_tibble(common_ora_downreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'ID' by Adjusted p-value
  dplyr::slice(1:20) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>%
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 2: Common genes downregulated pathways GO Term translation
GO Term Description
GO:0030335 positive regulation of cell migration
GO:2001233 regulation of apoptotic signaling pathway
GO:0043030 regulation of macrophage activation
GO:0019221 cytokine-mediated signaling pathway
GO:0002274 myeloid leukocyte activation
GO:0045058 T cell selection
GO:0097066 response to thyroid hormone
GO:0002263 cell activation involved in immune response
GO:0002366 leukocyte activation involved in immune response
GO:0007015 actin filament organization
GO:0030100 regulation of endocytosis
GO:0033627 cell adhesion mediated by integrin
GO:0030029 actin filament-based process
GO:0071402 cellular response to lipoprotein particle stimulus
GO:0050764 regulation of phagocytosis
GO:0006909 phagocytosis
GO:0030036 actin cytoskeleton organization
GO:0097067 cellular response to thyroid hormone stimulus
GO:0006897 endocytosis
GO:0007229 integrin-mediated signaling pathway
Code
downregulated_immune_common_4wpc <-
  filter_rows_by_GO_term(common_ora_downreg_4wpc, immune_related_GOterms, 'goterms')

as_tibble(downregulated_immune_common_4wpc) %>%
  dplyr::slice(1:10) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(
    stat = "identity",
    colour = 'black',
    linewidth = 0.3,
    width = 0.8,
    show.legend = TRUE
  ) +
  coord_flip() +
  scale_fill_distiller(palette = 'Purples',
                       name = 'Adjusted \n p-value') +
                       # guide = guide_colorbar(reverse = FALSE)) +
  scale_y_continuous() +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +
  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 5: Enriched immune pathways from common downregulated genes at 4 WPC
Code
as_tibble(downregulated_immune_common_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'ID' by Adjusted p-value
  dplyr::slice(1:10) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>%
  kable(
    booktabs = TRUE,
    col.names = c('GO Term',
                  'Description'),
    align = 'l'
  ) %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 3: Common genes downregulated immune pathways GO Term translation
GO Term Description
GO:0019221 cytokine-mediated signaling pathway
GO:0002274 myeloid leukocyte activation
GO:0045058 T cell selection
GO:0097066 response to thyroid hormone
GO:0002263 cell activation involved in immune response
GO:0002366 leukocyte activation involved in immune response
GO:0071402 cellular response to lipoprotein particle stimulus
GO:0050764 regulation of phagocytosis
GO:0006909 phagocytosis
GO:0097067 cellular response to thyroid hormone stimulus


Because there were five groups with more than 100 genes in the Venn diagram on Figure 3, I analyzed them all separately:

  • EOMES exclusive (382 genes)

  • GATA3 exclusive (456 genes)

  • GATA3-EOMES intersection (249 genes)

  • GATA3-EOMES-DNA vaccine intersection (148 genes)

  • GATA3-EOMES-DNA vaccine-IVHD intersection (176 genes)


Over-representation analysis of GATA3 exclusive DOWNREGULATED genes

Code
dnavaccine_4wpc_down <-
  significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC < 0,]$ID
eomes_4wpc_down <-
  significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC < 0,]$ID
ivhd_4wpc_down <-
  significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC < 0, ]$ID
gata3_4wpc_down <-
  significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC < 0, ]$ID
ivld_4wpc_down <-
  significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC < 0, ]$ID


gata3_exclusive_down_genes_4wpc <-
  setdiff(gata3_4wpc_down,
          c(dnavaccine_4wpc_down, eomes_4wpc_down, ivhd_4wpc_down, ivld_4wpc_down))


gata3_downregulated_orthologs_4wpc <- gorth(
  query = gata3_exclusive_down_genes_4wpc,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

gata3_downregulated_orthologs_tbl_4wpc <-
  as_tibble(gata3_downregulated_orthologs_4wpc) %>% dplyr::select(.,
                                                                 input,
                                                                 ortholog_name,
                                                                 ortholog_ensg,
                                                                 description) %>% dplyr::rename(
                                                                     .,
                                                                     ssalar_ensembl = input,
                                                                     hsapiens_ortholog = ortholog_name,
                                                                     hsapiens_ensembl = ortholog_ensg,
                                                                     description = description
                                                                   )


gata3_downregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 4: GATA3 exclusive downregulated orthologs at 4WPC (first 20 rows)
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000063564 N/A N/A N/A
ENSSSAG00000049859 ADD1 ENSG00000087274 adducin 1 [Source:HGNC Symbol;Acc:HGNC:243]
ENSSSAG00000002391 KHDRBS1 ENSG00000121774 KH RNA binding domain containing, signal transduction associated 1 [Source:HGNC Symbol;Acc:HGNC:18116]
ENSSSAG00000088854 N/A N/A N/A
ENSSSAG00000043692 HNRNPAB ENSG00000197451 heterogeneous nuclear ribonucleoprotein A/B [Source:HGNC Symbol;Acc:HGNC:5034]
ENSSSAG00000107579 N/A N/A N/A
ENSSSAG00000076252 AP3B1 ENSG00000132842 adaptor related protein complex 3 subunit beta 1 [Source:HGNC Symbol;Acc:HGNC:566]
ENSSSAG00000005920 ETF1 ENSG00000120705 eukaryotic translation termination factor 1 [Source:HGNC Symbol;Acc:HGNC:3477]
ENSSSAG00000076210 N/A N/A N/A
ENSSSAG00000060562 NUBP2 ENSG00000095906 NUBP iron-sulfur cluster assembly factor 2, cytosolic [Source:HGNC Symbol;Acc:HGNC:8042]
ENSSSAG00000077073 KMT2C ENSG00000055609 lysine methyltransferase 2C [Source:HGNC Symbol;Acc:HGNC:13726]
ENSSSAG00000107241 DHX15 ENSG00000109606 DEAH-box helicase 15 [Source:HGNC Symbol;Acc:HGNC:2738]
ENSSSAG00000081700 DYNC1I2 ENSG00000077380 dynein cytoplasmic 1 intermediate chain 2 [Source:HGNC Symbol;Acc:HGNC:2964]
ENSSSAG00000066076 MAMSTR ENSG00000176909 MEF2 activating motif and SAP domain containing transcriptional regulator [Source:HGNC Symbol;Acc:HGNC:26689]
ENSSSAG00000070289 RBM39 ENSG00000131051 RNA binding motif protein 39 [Source:HGNC Symbol;Acc:HGNC:15923]
ENSSSAG00000005064 CRELD2 ENSG00000184164 cysteine rich with EGF like domains 2 [Source:HGNC Symbol;Acc:HGNC:28150]
ENSSSAG00000068484 COPB1 ENSG00000129083 COPI coat complex subunit beta 1 [Source:HGNC Symbol;Acc:HGNC:2231]
ENSSSAG00000075198 LARS1 ENSG00000133706 leucyl-tRNA synthetase 1 [Source:HGNC Symbol;Acc:HGNC:6512]
ENSSSAG00000084004 N/A N/A N/A
ENSSSAG00000105293 N/A N/A N/A
Code
gata3_ora_downreg_4wpc <-
  enrichGO(
    gene = gata3_downregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )

From an input of 454 salmon ENSEMBL gene IDs exclusive to GATA3, 341 genes were converted to human orthologs, and used for over-representation analysis (Figure 6). The terms enriched in Figure 6 were then filtered through the immune-related term list to plot Figure 7.

Code
as_tibble(gata3_ora_downreg_4wpc) %>%
  dplyr::slice(1:20) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'ID' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value') +
                       # guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 6: Enriched pathways from GATA3 exclusive downregulated genes at 4 WPC (top 20 pathways by adjusted p-value)


Code
as_tibble(gata3_ora_downreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:20) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>%
  kable(
    booktabs = TRUE,
    col.names = c('GO Term',
                  'Description'),
    align = 'l'
  ) %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 5: GATA3 exclusive downregulated pathways GO Term translation
GO Term Description
GO:0006457 protein folding
GO:1902904 negative regulation of supramolecular fiber organization
GO:0018193 peptidyl-amino acid modification
GO:0006888 endoplasmic reticulum to Golgi vesicle-mediated transport
GO:0042149 cellular response to glucose starvation
GO:0051494 negative regulation of cytoskeleton organization
GO:0030098 lymphocyte differentiation
GO:0010762 regulation of fibroblast migration
GO:0048193 Golgi vesicle transport
GO:0007169 transmembrane receptor protein tyrosine kinase signaling pathway
GO:0031333 negative regulation of protein-containing complex assembly
GO:0010761 fibroblast migration
GO:0006983 ER overload response
GO:0006886 intracellular protein transport
GO:0030029 actin filament-based process
GO:0007015 actin filament organization
GO:0030036 actin cytoskeleton organization
GO:0010639 negative regulation of organelle organization
GO:0043254 regulation of protein-containing complex assembly
GO:0051129 negative regulation of cellular component organization
Code
downregulated_immune_gata3_4wpc <-
  filter_rows_by_GO_term(gata3_ora_downreg_4wpc, immune_related_GOterms, 'goterms')

as_tibble(downregulated_immune_gata3_4wpc) %>%
  dplyr::slice(1:10) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(
    stat = "identity",
    colour = 'black',
    linewidth = 0.3,
    width = 0.8,
    show.legend = TRUE
  ) +
  coord_flip() +
  scale_fill_distiller(palette = 'Purples',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = FALSE)) +
  scale_y_continuous() +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +
  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 7: Enriched immune pathways from GATA3 exclusive downregulated genes at 4 WPC (top 10 pathways by adjusted p-value)
Code
as_tibble(downregulated_immune_gata3_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:10) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>%
  kable(
    booktabs = TRUE,
    col.names = c('GO Term',
                  'Description'),
    align = 'l'
  ) %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 6: GATA3 exclusive downregulated immune pathways GO Term translation
GO Term Description
GO:0032868 response to insulin
GO:0010035 response to inorganic substance
GO:0030217 T cell differentiation
GO:0071496 cellular response to external stimulus
GO:1903131 mononuclear cell differentiation
GO:0002521 leukocyte differentiation
GO:0009267 cellular response to starvation
GO:0032869 cellular response to insulin stimulus
GO:0030098 lymphocyte differentiation
GO:0006983 ER overload response

Over-representation analysis of EOMES exclusive DOWNREGULATED genes


Code
eomes_exclusive_down_genes_4wpc <-
  setdiff(eomes_4wpc_down,
          c(dnavaccine_4wpc_down, ivhd_4wpc_down, gata3_4wpc_down, ivld_4wpc_down))

eomes_down_orthologs_4wpc <- gorth(
  query = eomes_exclusive_down_genes_4wpc,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

eomes_down_orthologs_tbl_4wpc <-
  as_tibble(eomes_down_orthologs_4wpc) %>% dplyr::select(.,
                                                                 input,
                                                                 ortholog_name,
                                                                 ortholog_ensg,
                                                                 description) %>% dplyr::rename(
                                                                     .,
                                                                     ssalar_ensembl = input,
                                                                     hsapiens_ortholog = ortholog_name,
                                                                     hsapiens_ensembl = ortholog_ensg,
                                                                     description = description
                                                                   )


eomes_down_orthologs_tbl_4wpc %>% head(., n = 20) %>%
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 7: EOMES exclusive downregulated orthologs at 4WPC (first 20 rows)
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000094153 N/A N/A N/A
ENSSSAG00000082595 N/A N/A N/A
ENSSSAG00000007854 ATP6AP2 ENSG00000182220 ATPase H+ transporting accessory protein 2 [Source:HGNC Symbol;Acc:HGNC:18305]
ENSSSAG00000107744 SFXN3 ENSG00000107819 sideroflexin 3 [Source:HGNC Symbol;Acc:HGNC:16087]
ENSSSAG00000002903 ADAM33 ENSG00000149451 ADAM metallopeptidase domain 33 [Source:HGNC Symbol;Acc:HGNC:15478]
ENSSSAG00000104948 N/A N/A N/A
ENSSSAG00000099571 N/A N/A N/A
ENSSSAG00000063098 YWHAQ ENSG00000134308 tyrosine 3-monooxygenase/tryptophan 5-monooxygenase activation protein theta [Source:HGNC Symbol;Acc:HGNC:12854]
ENSSSAG00000005645 N/A N/A N/A
ENSSSAG00000075127 AP2M1 ENSG00000161203 adaptor related protein complex 2 subunit mu 1 [Source:HGNC Symbol;Acc:HGNC:564]
ENSSSAG00000056674 N/A N/A N/A
ENSSSAG00000066201 GLUD1 ENSG00000148672 glutamate dehydrogenase 1 [Source:HGNC Symbol;Acc:HGNC:4335]
ENSSSAG00000119962 N/A N/A N/A
ENSSSAG00000041622 ZYX ENSG00000159840 zyxin [Source:HGNC Symbol;Acc:HGNC:13200]
ENSSSAG00000089095 CLTA ENSG00000122705 clathrin light chain A [Source:HGNC Symbol;Acc:HGNC:2090]
ENSSSAG00000114855 N/A N/A N/A
ENSSSAG00000010060 STAT3 ENSG00000168610 signal transducer and activator of transcription 3 [Source:HGNC Symbol;Acc:HGNC:11364]
ENSSSAG00000008813 IPO5 ENSG00000065150 importin 5 [Source:HGNC Symbol;Acc:HGNC:6402]
ENSSSAG00000067990 PSMC4 ENSG00000013275 proteasome 26S subunit, ATPase 4 [Source:HGNC Symbol;Acc:HGNC:9551]
ENSSSAG00000112907 RBM8A ENSG00000265241 RNA binding motif protein 8A [Source:HGNC Symbol;Acc:HGNC:9905]
Code
eomes_ora_downreg_4wpc <-
  enrichGO(
    gene = eomes_down_orthologs_tbl_4wpc$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )

From an input of 382 salmon ENSEMBL gene IDs exclusive to EOMES, 278 genes were converted to human orthologs.

These 278 genes were used for over-representation analysis Figure 8, and filtered by immune-related term in Figure 9.

Code
as_tibble(eomes_ora_downreg_4wpc) %>%
  dplyr::slice(1:20) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'Description' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 8: Enriched pathways from EOMES exclusive downregulated genes at 4 WPC
Code
as_tibble(eomes_ora_downreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:20) %>%
  map_df(., rev) %>%
  dplyr::select(ID, Description) %>%
  kable(
    booktabs = TRUE,
    col.names = c('GO Term',
                  'Description'),
    align = 'l'
  ) %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 8: EOMES exclusive downregulated pathways GO Term translation
GO Term Description
GO:0030029 actin filament-based process
GO:1903050 regulation of proteolysis involved in protein catabolic process
GO:0071453 cellular response to oxygen levels
GO:0019752 carboxylic acid metabolic process
GO:0030162 regulation of proteolysis
GO:0007015 actin filament organization
GO:0060249 anatomical structure homeostasis
GO:0001894 tissue homeostasis
GO:2001242 regulation of intrinsic apoptotic signaling pathway
GO:0048871 multicellular organismal-level homeostasis
GO:0071604 transforming growth factor beta production
GO:0006984 ER-nucleus signaling pathway
GO:0006886 intracellular protein transport
GO:0034976 response to endoplasmic reticulum stress
GO:0010498 proteasomal protein catabolic process
GO:0071634 regulation of transforming growth factor beta production
GO:0006511 ubiquitin-dependent protein catabolic process
GO:0043632 modification-dependent macromolecule catabolic process
GO:0019941 modification-dependent protein catabolic process
GO:0051603 proteolysis involved in protein catabolic process
Code
downregulated_immune_eomes_4wpc <-
  filter_rows_by_GO_term(eomes_ora_downreg_4wpc, immune_related_GOterms, 'goterms')

as_tibble(downregulated_immune_eomes_4wpc) %>%
  dplyr::slice(1:10) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'Description' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Purples',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 9: Enriched immune pathways from EOMES exclusive downregulated genes at 4 WPC
Code
as_tibble(downregulated_immune_eomes_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:10) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 9: EOMES exclusive downregulated pathways GO Term translation
GO Term Description
GO:0002363 alpha-beta T cell lineage commitment
GO:0010039 response to iron ion
GO:0009611 response to wounding
GO:2001243 negative regulation of intrinsic apoptotic signaling pathway
GO:0071453 cellular response to oxygen levels
GO:2001242 regulation of intrinsic apoptotic signaling pathway

Over-representation analysis of GATA3-EOMES intersection DOWNREGULATED genes

Code
intersection <- intersect(gata3_4wpc_down, eomes_4wpc_down)

# length(intersection)

gata3EOMES_exclusive_genes_down <- setdiff(intersection,
                                           c(dnavaccine_4wpc_down, ivhd_4wpc_down, ivld_4wpc_down))

# length(gata3EOMES_exclusive_genes_down)

gata3EOMES_down_orthologs_4wpc <- gorth(
  query = gata3EOMES_exclusive_genes_down,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

# head(gata3EOMES_down_orthologs_4wpc)
# nrow(gata3EOMES_down_orthologs_4wpc)

gata3EOMES_intersection_down_orthologs_tbl_4wpc <-
  as_tibble(gata3EOMES_down_orthologs_4wpc) %>% dplyr::select(.,
                                                              input,
                                                              ortholog_name,
                                                              ortholog_ensg,
                                                              description) %>% dplyr::rename(
                                                                .,
                                                                ssalar_ensembl = input,
                                                                hsapiens_ortholog = ortholog_name,
                                                                hsapiens_ensembl = ortholog_ensg,
                                                                description = description
                                                                )


gata3EOMES_intersection_down_orthologs_tbl_4wpc %>% head(., n = 20) %>%
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 10: GATA3-EOMES intersection downregulated orthologs at 4WPC (first 20 rows)
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000080819 N/A N/A N/A
ENSSSAG00000088119 DNAJA2 ENSG00000069345 DnaJ heat shock protein family (Hsp40) member A2 [Source:HGNC Symbol;Acc:HGNC:14884]
ENSSSAG00000041182 ADRM1 ENSG00000130706 ADRM1 26S proteasome ubiquitin receptor [Source:HGNC Symbol;Acc:HGNC:15759]
ENSSSAG00000073163 N/A N/A N/A
ENSSSAG00000063958 PSMD12 ENSG00000197170 proteasome 26S subunit, non-ATPase 12 [Source:HGNC Symbol;Acc:HGNC:9557]
ENSSSAG00000104475 CWC25 ENSG00000273559 CWC25 spliceosome associated protein homolog [Source:HGNC Symbol;Acc:HGNC:25989]
ENSSSAG00000101565 FAM3C ENSG00000196937 FAM3 metabolism regulating signaling molecule C [Source:HGNC Symbol;Acc:HGNC:18664]
ENSSSAG00000043582 PARP1 ENSG00000143799 poly(ADP-ribose) polymerase 1 [Source:HGNC Symbol;Acc:HGNC:270]
ENSSSAG00000069956 PSMD6 ENSG00000163636 proteasome 26S subunit, non-ATPase 6 [Source:HGNC Symbol;Acc:HGNC:9564]
ENSSSAG00000077285 MYO6 ENSG00000196586 myosin VI [Source:HGNC Symbol;Acc:HGNC:7605]
ENSSSAG00000054039 SURF4 ENSG00000148248 surfeit 4 [Source:HGNC Symbol;Acc:HGNC:11476]
ENSSSAG00000047797 ANXA6 ENSG00000197043 annexin A6 [Source:HGNC Symbol;Acc:HGNC:544]
ENSSSAG00000092486 UBE2L3 ENSG00000185651 ubiquitin conjugating enzyme E2 L3 [Source:HGNC Symbol;Acc:HGNC:12488]
ENSSSAG00000004995 PKN1 ENSG00000123143 protein kinase N1 [Source:HGNC Symbol;Acc:HGNC:9405]
ENSSSAG00000076429 GRN ENSG00000030582 granulin precursor [Source:HGNC Symbol;Acc:HGNC:4601]
ENSSSAG00000060487 CD81 ENSG00000110651 CD81 molecule [Source:HGNC Symbol;Acc:HGNC:1701]
ENSSSAG00000055328 CREB3L2 ENSG00000182158 cAMP responsive element binding protein 3 like 2 [Source:HGNC Symbol;Acc:HGNC:23720]
ENSSSAG00000077303 PSMD2 ENSG00000175166 proteasome 26S subunit ubiquitin receptor, non-ATPase 2 [Source:HGNC Symbol;Acc:HGNC:9559]
ENSSSAG00000042378 UBA7 ENSG00000182179 ubiquitin like modifier activating enzyme 7 [Source:HGNC Symbol;Acc:HGNC:12471]
ENSSSAG00000034990 PSMA3 ENSG00000100567 proteasome 20S subunit alpha 3 [Source:HGNC Symbol;Acc:HGNC:9532]
Code
GATA3eomes_intersection_ora_downreg_4wpc <-
  enrichGO(
    gene = gata3EOMES_intersection_down_orthologs_tbl_4wpc$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )

From an input of 244 salmon ENSEMBL gene IDs exclusive to EOMES, 181 genes were converted to human orthologs.

These 184 genes were used for over-representation analysis Figure 10, and filtered by immune-related term in Figure 11.

Code
as_tibble(GATA3eomes_intersection_ora_downreg_4wpc) %>%
  dplyr::slice(1:20) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'ID' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 10: Enriched pathways from GATA3-EOMES intersection downregulated genes at 4 WPC (top 20 pathways by adjusted p-value)
Code
as_tibble(GATA3eomes_intersection_ora_downreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:20) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 11: GATA3-EOMES intersection downregulated pathways GO Term translation
GO Term Description
GO:0006897 endocytosis
GO:2001233 regulation of apoptotic signaling pathway
GO:0035966 response to topologically incorrect protein
GO:0097190 apoptotic signaling pathway
GO:0006909 phagocytosis
GO:1903131 mononuclear cell differentiation
GO:0006986 response to unfolded protein
GO:0140467 integrated stress response signaling
GO:0010498 proteasomal protein catabolic process
GO:0002573 myeloid leukocyte differentiation
GO:0043065 positive regulation of apoptotic process
GO:0002521 leukocyte differentiation
GO:0006457 protein folding
GO:0043068 positive regulation of programmed cell death
GO:0030099 myeloid cell differentiation
GO:0051604 protein maturation
GO:0006511 ubiquitin-dependent protein catabolic process
GO:0043632 modification-dependent macromolecule catabolic process
GO:0019941 modification-dependent protein catabolic process
GO:0051603 proteolysis involved in protein catabolic process
Code
downregulated_immune_GATA3eomes_4wpc <-
  filter_rows_by_GO_term(GATA3eomes_intersection_ora_downreg_4wpc, immune_related_GOterms, 'goterms')

as_tibble(downregulated_immune_GATA3eomes_4wpc) %>%
  dplyr::slice(1:10) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'Description' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Purples',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 11: Enriched immune pathways from GATA3-EOMES intersection downregulated genes at 4 WPC (top 10 pathways by adjusted p-value)
Code
as_tibble(downregulated_immune_GATA3eomes_4wpc) %>%
  dplyr::slice(1:10) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 12: GATA3-EOMES intersection downregulated pathways immune GO Term translation
GO Term Description
GO:0001767 establishment of lymphocyte polarity
GO:0042110 T cell activation
GO:0071216 cellular response to biotic stimulus
GO:0097190 apoptotic signaling pathway
GO:0006909 phagocytosis
GO:1903131 mononuclear cell differentiation
GO:0002573 myeloid leukocyte differentiation
GO:0043065 positive regulation of apoptotic process
GO:0002521 leukocyte differentiation
GO:0030099 myeloid cell differentiation

Over-representation analysis of GATA3-EOMES-DNA vaccine intersection DOWNREGULATED genes

Code
gata3_eomes_dnavaccine_intersection <- setdiff(Reduce(
  intersect,
  list(gata3_4wpc_down, eomes_4wpc_down, dnavaccine_4wpc_down)
),
union(ivhd_4wpc_down, ivld_4wpc_down))


gata3_eomes_dnavaccine_intersection_orthologs <- gorth(
  query = gata3_eomes_dnavaccine_intersection,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

# head(gata3EOMES_down_orthologs_4wpc)
# nrow(gata3EOMES_down_orthologs_4wpc)

gata3_eomes_dnavaccine_intersection_tbl_orthologs <-
  as_tibble(gata3_eomes_dnavaccine_intersection_orthologs) %>% dplyr::select(.,
                                                              input,
                                                              ortholog_name,
                                                              ortholog_ensg,
                                                              description) %>% dplyr::rename(
                                                                .,
                                                                ssalar_ensembl = input,
                                                                hsapiens_ortholog = ortholog_name,
                                                                hsapiens_ensembl = ortholog_ensg,
                                                                description = description
                                                                )


gata3_eomes_dnavaccine_intersection_tbl_orthologs %>% head(., n = 20) %>%
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 13: GATA3-EOMES-DNA vaccine intersection downregulated orthologs at 4WPC (first 20 rows)
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000041751 CCT7 ENSG00000135624 chaperonin containing TCP1 subunit 7 [Source:HGNC Symbol;Acc:HGNC:1622]
ENSSSAG00000073238 DBNL ENSG00000136279 drebrin like [Source:HGNC Symbol;Acc:HGNC:2696]
ENSSSAG00000066748 NCK1 ENSG00000158092 NCK adaptor protein 1 [Source:HGNC Symbol;Acc:HGNC:7664]
ENSSSAG00000097439 YIPF5 ENSG00000145817 Yip1 domain family member 5 [Source:HGNC Symbol;Acc:HGNC:24877]
ENSSSAG00000007250 TXNRD3 ENSG00000197763 thioredoxin reductase 3 [Source:HGNC Symbol;Acc:HGNC:20667]
ENSSSAG00000110550 UBE2L3 ENSG00000185651 ubiquitin conjugating enzyme E2 L3 [Source:HGNC Symbol;Acc:HGNC:12488]
ENSSSAG00000038979 UBR5 ENSG00000104517 ubiquitin protein ligase E3 component n-recognin 5 [Source:HGNC Symbol;Acc:HGNC:16806]
ENSSSAG00000005324 ACSL3 ENSG00000123983 acyl-CoA synthetase long chain family member 3 [Source:HGNC Symbol;Acc:HGNC:3570]
ENSSSAG00000064191 MSN ENSG00000147065 moesin [Source:HGNC Symbol;Acc:HGNC:7373]
ENSSSAG00000120873 N/A N/A N/A
ENSSSAG00000054182 OS9 ENSG00000135506 OS9 endoplasmic reticulum lectin [Source:HGNC Symbol;Acc:HGNC:16994]
ENSSSAG00000099211 SEPTIN12 ENSG00000140623 septin 12 [Source:HGNC Symbol;Acc:HGNC:26348]
ENSSSAG00000076111 KARS1 ENSG00000065427 lysyl-tRNA synthetase 1 [Source:HGNC Symbol;Acc:HGNC:6215]
ENSSSAG00000115363 FBXO46 ENSG00000177051 F-box protein 46 [Source:HGNC Symbol;Acc:HGNC:25069]
ENSSSAG00000113980 CD79A ENSG00000105369 CD79a molecule [Source:HGNC Symbol;Acc:HGNC:1698]
ENSSSAG00000063086 LYPLA2 ENSG00000011009 lysophospholipase 2 [Source:HGNC Symbol;Acc:HGNC:6738]
ENSSSAG00000115883 N/A N/A N/A
ENSSSAG00000064407 MAP3K14 ENSG00000006062 mitogen-activated protein kinase kinase kinase 14 [Source:HGNC Symbol;Acc:HGNC:6853]
ENSSSAG00000109961 EHD2 ENSG00000024422 EH domain containing 2 [Source:HGNC Symbol;Acc:HGNC:3243]
ENSSSAG00000064296 AKAP10 ENSG00000108599 A-kinase anchoring protein 10 [Source:HGNC Symbol;Acc:HGNC:368]
Code
GATA3eomesDNAVACCINE_intersection_ora_downreg_4wpc <-
  enrichGO(
    gene = gata3_eomes_dnavaccine_intersection_tbl_orthologs$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )

The 145 salmon ENSEMBL gene IDs from the intersection between GATA3, EOMES, and DNA vaccine, were converted to 110 human orthologs.

These 112 genes were used for over-representation analysis Figure 12, and filtered by immune-related term in Figure 13.

Code
as_tibble(GATA3eomesDNAVACCINE_intersection_ora_downreg_4wpc) %>%
  dplyr::slice(1:20) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'Description' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 12: Enriched pathways from GATA3-EOMES-DNA vaccine intersection downregulated genes at 4 WPC (top 20 pathways by adjusted p-value)
Code
as_tibble(GATA3eomesDNAVACCINE_intersection_ora_downreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:20) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 14: GATA3-EOMES-DNA vaccine intersection GO Term translation
GO Term Description
GO:0002274 myeloid leukocyte activation
GO:0032386 regulation of intracellular transport
GO:0002694 regulation of leukocyte activation
GO:1903829 positive regulation of protein localization
GO:0022604 regulation of cell morphogenesis
GO:0030036 actin cytoskeleton organization
GO:0002431 Fc receptor mediated stimulatory signaling pathway
GO:0038094 Fc-gamma receptor signaling pathway
GO:0002252 immune effector process
GO:0008360 regulation of cell shape
GO:0032880 regulation of protein localization
GO:0038093 Fc receptor signaling pathway
GO:0038096 Fc-gamma receptor signaling pathway involved in phagocytosis
GO:0002433 immune response-regulating cell surface receptor signaling pathway involved in phagocytosis
GO:0050865 regulation of cell activation
GO:0070661 leukocyte proliferation
GO:0032943 mononuclear cell proliferation
GO:0046651 lymphocyte proliferation
GO:0006909 phagocytosis
GO:0006897 endocytosis
Code
downregulated_immune_GATA3eomesDNAVACCINE_4wpc <-
  filter_rows_by_GO_term(GATA3eomesDNAVACCINE_intersection_ora_downreg_4wpc, immune_related_GOterms, 'goterms')

as_tibble(downregulated_immune_GATA3eomesDNAVACCINE_4wpc) %>%
  dplyr::slice(1:10) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'Description' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Purples',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 13: Enriched immune pathways from GATA3-EOMES-DNA vaccine intersection downregulated genes at 4 WPC (top 10 pathways by adjusted p-value)
Code
as_tibble(downregulated_immune_GATA3eomesDNAVACCINE_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:10) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 15: GATA3-EOMES-DNA vaccine intersection (immune) GO Term translation
GO Term Description
GO:0043299 leukocyte degranulation
GO:0002274 myeloid leukocyte activation
GO:0002694 regulation of leukocyte activation
GO:0002252 immune effector process
GO:0038096 Fc-gamma receptor signaling pathway involved in phagocytosis
GO:0002433 immune response-regulating cell surface receptor signaling pathway involved in phagocytosis
GO:0070661 leukocyte proliferation
GO:0032943 mononuclear cell proliferation
GO:0046651 lymphocyte proliferation
GO:0006909 phagocytosis

Over-representation analysis of GATA3-EOMES-DNA vaccine-IVHD intersection DOWNREGULATED genes

Code
intersectionGEDI <- Reduce(intersect, list(gata3_4wpc_down, eomes_4wpc_down, dnavaccine_4wpc_down, ivhd_4wpc_down))
gata3_eomes_dnavaccine_ivhd_intersection <- setdiff(intersectionGEDI, ivld_4wpc_down)


gata3_eomes_dnavaccine_ivhd_intersection_orthologs <- gorth(
  query = gata3_eomes_dnavaccine_ivhd_intersection,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs <-
  as_tibble(gata3_eomes_dnavaccine_ivhd_intersection_orthologs) %>% dplyr::select(.,
                                                              input,
                                                              ortholog_name,
                                                              ortholog_ensg,
                                                              description) %>% dplyr::rename(
                                                                .,
                                                                ssalar_ensembl = input,
                                                                hsapiens_ortholog = ortholog_name,
                                                                hsapiens_ensembl = ortholog_ensg,
                                                                description = description
                                                                )


gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs %>% head(., n = 20) %>%
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 16: GATA3-EOMES-DNA vaccine-IVHD intersection downregulated orthologs at 4WPC (first 20 rows)
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000051579 N/A N/A N/A
ENSSSAG00000002480 HSP90AB1 ENSG00000096384 heat shock protein 90 alpha family class B member 1 [Source:HGNC Symbol;Acc:HGNC:5258]
ENSSSAG00000045085 CRLF3 ENSG00000176390 cytokine receptor like factor 3 [Source:HGNC Symbol;Acc:HGNC:17177]
ENSSSAG00000004180 SAT1 ENSG00000130066 spermidine/spermine N1-acetyltransferase 1 [Source:HGNC Symbol;Acc:HGNC:10540]
ENSSSAG00000002265 COPB2 ENSG00000184432 COPI coat complex subunit beta 2 [Source:HGNC Symbol;Acc:HGNC:2232]
ENSSSAG00000043619 EIF3J ENSG00000104131 eukaryotic translation initiation factor 3 subunit J [Source:HGNC Symbol;Acc:HGNC:3270]
ENSSSAG00000003377 RAB5C ENSG00000108774 RAB5C, member RAS oncogene family [Source:HGNC Symbol;Acc:HGNC:9785]
ENSSSAG00000041828 JUP ENSG00000173801 junction plakoglobin [Source:HGNC Symbol;Acc:HGNC:6207]
ENSSSAG00000102439 ZYX ENSG00000159840 zyxin [Source:HGNC Symbol;Acc:HGNC:13200]
ENSSSAG00000080154 N/A N/A N/A
ENSSSAG00000071453 SPINT2 ENSG00000167642 serine peptidase inhibitor, Kunitz type 2 [Source:HGNC Symbol;Acc:HGNC:11247]
ENSSSAG00000002698 VCP ENSG00000165280 valosin containing protein [Source:HGNC Symbol;Acc:HGNC:12666]
ENSSSAG00000076348 GSTO1 ENSG00000148834 glutathione S-transferase omega 1 [Source:HGNC Symbol;Acc:HGNC:13312]
ENSSSAG00000056514 ASAH1 ENSG00000104763 N-acylsphingosine amidohydrolase 1 [Source:HGNC Symbol;Acc:HGNC:735]
ENSSSAG00000015747 PTP4A1 ENSG00000112245 protein tyrosine phosphatase 4A1 [Source:HGNC Symbol;Acc:HGNC:9634]
ENSSSAG00000048640 ANXA2 ENSG00000182718 annexin A2 [Source:HGNC Symbol;Acc:HGNC:537]
ENSSSAG00000000892 CLTC ENSG00000141367 clathrin heavy chain [Source:HGNC Symbol;Acc:HGNC:2092]
ENSSSAG00000041357 TPP1 ENSG00000166340 tripeptidyl peptidase 1 [Source:HGNC Symbol;Acc:HGNC:2073]
ENSSSAG00000075948 TALDO1 ENSG00000177156 transaldolase 1 [Source:HGNC Symbol;Acc:HGNC:11559]
ENSSSAG00000041291 DENND4C ENSG00000137145 DENN domain containing 4C [Source:HGNC Symbol;Acc:HGNC:26079]
Code
GATA3eomesDNAVACCINEivhd_intersection_ora_downreg_4wpc <-
  enrichGO(
    gene = gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )

The 179 salmon ENSEMBL gene IDs from the intersection between GATA3, EOMES, DNA vaccine, and IV-HD were converted to 144 human orthologs.

These 142 genes were used for over-representation analysis Figure 14, and filtered by immune-related term in Figure 15.

Code
as_tibble(GATA3eomesDNAVACCINEivhd_intersection_ora_downreg_4wpc) %>%
  dplyr::slice(1:20) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'ID' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 14: Enriched pathways from GATA3-EOMES-DNA vaccine-IVHD intersection downregulated genes at 4 WPC (top 20 pathways by adjusted p-value)
Code
as_tibble(GATA3eomesDNAVACCINEivhd_intersection_ora_downreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:20) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 17: GATA3-EOMES-DNA vaccine intersection GO Term translation
GO Term Description
GO:0099024 plasma membrane invagination
GO:0002688 regulation of leukocyte chemotaxis
GO:0030833 regulation of actin filament polymerization
GO:1901292 nucleoside phosphate catabolic process
GO:0002429 immune response-activating cell surface receptor signaling pathway
GO:0006898 receptor-mediated endocytosis
GO:0045807 positive regulation of endocytosis
GO:0030041 actin filament polymerization
GO:0006911 phagocytosis, engulfment
GO:0006909 phagocytosis
GO:0050851 antigen receptor-mediated signaling pathway
GO:0008154 actin polymerization or depolymerization
GO:0043407 negative regulation of MAP kinase activity
GO:0007015 actin filament organization
GO:0030029 actin filament-based process
GO:0030036 actin cytoskeleton organization
GO:0097435 supramolecular fiber organization
GO:0050853 B cell receptor signaling pathway
GO:0031589 cell-substrate adhesion
GO:0006897 endocytosis
Code
downregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc <-
  filter_rows_by_GO_term(GATA3eomesDNAVACCINEivhd_intersection_ora_downreg_4wpc, immune_related_GOterms, 'goterms')

as_tibble(downregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc) %>%
  dplyr::slice(1:10) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'Description' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Purples',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 15: Enriched immune pathways from GATA3-EOMES-DNA vaccine intersection downregulated genes at 4 WPC (top 10 pathways by adjusted p-value)
Code
as_tibble(downregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:10) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 18: GATA3-EOMES-DNA vaccine intersection (immune) GO Term translation
GO Term Description
GO:0030595 leukocyte chemotaxis
GO:0050854 regulation of antigen receptor-mediated signaling pathway
GO:0002685 regulation of leukocyte migration
GO:0002688 regulation of leukocyte chemotaxis
GO:0002429 immune response-activating cell surface receptor signaling pathway
GO:0006911 phagocytosis, engulfment
GO:0006909 phagocytosis
GO:0050851 antigen receptor-mediated signaling pathway
GO:0050853 B cell receptor signaling pathway
GO:0031589 cell-substrate adhesion

Upregulated

Code
# create list of upregulated geneIDs
a <- list(
  A = significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC > 0, ]$ID,
  B = significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC > 0, ]$ID,
  C = significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC > 0, ]$ID,
  D = significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC > 0, ]$ID,
  E = significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC > 0, ]$ID
)

# add treatment names
names(a) <-
  c('DNA vaccine', 'EOMES', 'GATA3', 'IVHD', 'IVLD')

# check gene counts per treatment
kable((sapply(a, length)), col.names = c('count'))
count
DNA vaccine 685
EOMES 1018
GATA3 1138
IVHD 595
IVLD 248
Code
# remove treatments with low counts from the list
# a$IVLD <- NULL


Code
# plot Venn diagram
display_venn(
  a,
  fill = c('#cdb4db', '#bde0fe', '#ccd5ae', '#d4a373', '#f08080'),
  lwd = 1,
  cex = 1,
  cat.cex = 1,
  cat.fontfamily = 'serif',
  # cat.fontface = 'bold',
  cat.default.pos = 'outer',
  cat.dist = c(0.20, 0.20, 0.22, 0.20,.20),
  cat.pos = c(360, 360, 250, 90, 360))

Figure 16: Upregulated genes, per treatment, at 4 WPC. pTagRFP as reference


From the Venn diagram, I gathered the genes that are common to all treatments, and converted them to h. sapiens orthologs. They are listed on Table 19.

I also did over-representation analysis on the following gene lists:

  • EOMES (255 genes)

  • GATA3 (346 genes)

  • EOMES-GATA3 intersection (174 genes)

  • GATA3-DNA vaccine-IVHD intersection (174 genes)

Over-representation analysis of common UPREGULATED genes at 4 WPC

Code
common_genes_upregulated_4WPC <-
  Reduce(
    intersect,
    list(
      significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC > 0, ]$ID,
      significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC > 0, ]$ID,
      significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC > 0, ]$ID,
      significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC > 0, ]$ID,
      significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC > 0, ]$ID
    )
  )

# length(common_genes_upregulated_4WPC)

common_upregulated_orthologs_4wpc <- gorth(
  query = common_genes_upregulated_4WPC,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

common_upregulated_orthologs_tbl_4wpc <-
  as_tibble(common_upregulated_orthologs_4wpc) %>% dplyr::select(.,
                                                                   input,
                                                                   ortholog_name,
                                                                   ortholog_ensg,
                                                                   description) %>% dplyr::rename(
                                                                     .,
                                                                     ssalar_ensembl = input,
                                                                     hsapiens_ortholog = ortholog_name,
                                                                     hsapiens_ensembl = ortholog_ensg,
                                                                     description = description
                                                                   )

common_upregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>% 
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c',
  ) %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 19: Common upregulated orthologs at 4 WPC
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000064243 GFI1B ENSG00000165702 growth factor independent 1B transcriptional repressor [Source:HGNC Symbol;Acc:HGNC:4238]
ENSSSAG00000055575 N/A N/A N/A
ENSSSAG00000084117 TNFRSF10A ENSG00000104689 TNF receptor superfamily member 10a [Source:HGNC Symbol;Acc:HGNC:11904]
ENSSSAG00000044296 TAL1 ENSG00000162367 TAL bHLH transcription factor 1, erythroid differentiation factor [Source:HGNC Symbol;Acc:HGNC:11556]
ENSSSAG00000087439 HBG2 ENSG00000196565 hemoglobin subunit gamma 2 [Source:HGNC Symbol;Acc:HGNC:4832]
ENSSSAG00000074325 PPFIA4 ENSG00000143847 PTPRF interacting protein alpha 4 [Source:HGNC Symbol;Acc:HGNC:9248]
ENSSSAG00000057121 ENPP2 ENSG00000136960 ectonucleotide pyrophosphatase/phosphodiesterase 2 [Source:HGNC Symbol;Acc:HGNC:3357]
ENSSSAG00000045065 HBE1 ENSG00000213931 hemoglobin subunit epsilon 1 [Source:HGNC Symbol;Acc:HGNC:4830]
ENSSSAG00000046626 CAT ENSG00000121691 catalase [Source:HGNC Symbol;Acc:HGNC:1516]
ENSSSAG00000000221 AQP1 ENSG00000240583 aquaporin 1 (Colton blood group) [Source:HGNC Symbol;Acc:HGNC:633]
ENSSSAG00000103747 HBE1 ENSG00000213931 hemoglobin subunit epsilon 1 [Source:HGNC Symbol;Acc:HGNC:4830]
ENSSSAG00000044737 HBG2 ENSG00000196565 hemoglobin subunit gamma 2 [Source:HGNC Symbol;Acc:HGNC:4832]
ENSSSAG00000118282 TWF2 ENSG00000247596 twinfilin actin binding protein 2 [Source:HGNC Symbol;Acc:HGNC:9621]
ENSSSAG00000106259 N/A N/A N/A
ENSSSAG00000006088 BNIP3 ENSG00000176171 BCL2 interacting protein 3 [Source:HGNC Symbol;Acc:HGNC:1084]
ENSSSAG00000065233 HBE1 ENSG00000213931 hemoglobin subunit epsilon 1 [Source:HGNC Symbol;Acc:HGNC:4830]
ENSSSAG00000105188 SOX4 ENSG00000124766 SRY-box transcription factor 4 [Source:HGNC Symbol;Acc:HGNC:11200]
ENSSSAG00000088104 DBP ENSG00000105516 D-box binding PAR bZIP transcription factor [Source:HGNC Symbol;Acc:HGNC:2697]
ENSSSAG00000089742 N/A N/A N/A
ENSSSAG00000105062 IGFBP3 ENSG00000146674 insulin like growth factor binding protein 3 [Source:HGNC Symbol;Acc:HGNC:5472]

From an input of 168 salmon ENSEMBL gene IDs, 136 genes were converted to human orthologs.

Code
common_genes_ora_downreg_4wpc <-
  enrichGO(
    gene = common_upregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )
Code
as_tibble(common_genes_ora_downreg_4wpc) %>%
  dplyr::slice(1:20) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'Description' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3,
           ) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 17: Enriched pathways from common upregulated genes at 4 WPC (top 20 pathways by adjusted p-value)
Code
as_tibble(common_genes_ora_downreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:20) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 20: Common upregulated genes GO Term translation
GO Term Description
GO:0009142 nucleoside triphosphate biosynthetic process
GO:0072522 purine-containing compound biosynthetic process
GO:0009201 ribonucleoside triphosphate biosynthetic process
GO:0006164 purine nucleotide biosynthetic process
GO:0072521 purine-containing compound metabolic process
GO:1901293 nucleoside phosphate biosynthetic process
GO:0009165 nucleotide biosynthetic process
GO:0006163 purine nucleotide metabolic process
GO:0006753 nucleoside phosphate metabolic process
GO:0009117 nucleotide metabolic process
GO:0009152 purine ribonucleotide biosynthetic process
GO:0006091 generation of precursor metabolites and energy
GO:0061061 muscle structure development
GO:0055086 nucleobase-containing small molecule metabolic process
GO:0046390 ribose phosphate biosynthetic process
GO:0009260 ribonucleotide biosynthetic process
GO:0006119 oxidative phosphorylation
GO:0015980 energy derivation by oxidation of organic compounds
GO:0045333 cellular respiration
GO:0009060 aerobic respiration

The upregulated genes common to all treatments enriched a total of 186 pathways, but only the top 20 by adjusted p-value are displayed in Figure 17.

Over-representation analysis of GATA3 exclusive UPREGULATED genes at 4 WPC

Code
dnavaccine_4wpc_up <-
  significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC > 0,]$ID
eomes_4wpc_up <-
  significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC > 0,]$ID
gata3_4wpc_up <-
  significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC > 0,]$ID
ivhd_4wpc_up <-
  significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC > 0,]$ID
ivld_4wpc_up <-
  significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC > 0,]$ID


gata3_exclusive_up_genes_4wpc <-
  setdiff(gata3_4wpc_up,
          c(dnavaccine_4wpc_up, eomes_4wpc_up, ivhd_4wpc_up, ivld_4wpc_up))

# length(gata3_exclusive_up_genes_4wpc)

gata3_upregulated_orthologs_4wpc <- gorth(
  query = gata3_exclusive_up_genes_4wpc,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

gata3_upregulated_orthologs_tbl_4wpc <-
  as_tibble(gata3_upregulated_orthologs_4wpc) %>% dplyr::select(.,
                                                                   input,
                                                                   ortholog_name,
                                                                   ortholog_ensg,
                                                                   description) %>% dplyr::rename(
                                                                     .,
                                                                     ssalar_ensembl = input,
                                                                     hsapiens_ortholog = ortholog_name,
                                                                     hsapiens_ensembl = ortholog_ensg,
                                                                     description = description
                                                                   )

gata3_upregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 21: GATA3 exclusive upregulated orthologs at 4 WPC, first 20 rows
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000114656 COL10A1 ENSG00000123500 collagen type X alpha 1 chain [Source:HGNC Symbol;Acc:HGNC:2185]
ENSSSAG00000105668 KLF17 ENSG00000171872 KLF transcription factor 17 [Source:HGNC Symbol;Acc:HGNC:18830]
ENSSSAG00000047878 PAPLN ENSG00000100767 papilin, proteoglycan like sulfated glycoprotein [Source:HGNC Symbol;Acc:HGNC:19262]
ENSSSAG00000044874 PRSS16 ENSG00000112812 serine protease 16 [Source:HGNC Symbol;Acc:HGNC:9480]
ENSSSAG00000114639 N/A N/A N/A
ENSSSAG00000107243 ABCG4 ENSG00000172350 ATP binding cassette subfamily G member 4 [Source:HGNC Symbol;Acc:HGNC:13884]
ENSSSAG00000084306 CNGA2 ENSG00000183862 cyclic nucleotide gated channel subunit alpha 2 [Source:HGNC Symbol;Acc:HGNC:2149]
ENSSSAG00000109302 FAM174B ENSG00000185442 family with sequence similarity 174 member B [Source:HGNC Symbol;Acc:HGNC:34339]
ENSSSAG00000064220 PADI2 ENSG00000117115 peptidyl arginine deiminase 2 [Source:HGNC Symbol;Acc:HGNC:18341]
ENSSSAG00000106865 ART4 ENSG00000111339 ADP-ribosyltransferase 4 (inactive) (Dombrock blood group) [Source:HGNC Symbol;Acc:HGNC:726]
ENSSSAG00000084240 TAL1 ENSG00000162367 TAL bHLH transcription factor 1, erythroid differentiation factor [Source:HGNC Symbol;Acc:HGNC:11556]
ENSSSAG00000089308 N/A N/A N/A
ENSSSAG00000015164 NRP1 ENSG00000099250 neuropilin 1 [Source:HGNC Symbol;Acc:HGNC:8004]
ENSSSAG00000080590 KLF17 ENSG00000171872 KLF transcription factor 17 [Source:HGNC Symbol;Acc:HGNC:18830]
ENSSSAG00000105530 N/A N/A N/A
ENSSSAG00000006070 SEPTIN3 ENSG00000100167 septin 3 [Source:HGNC Symbol;Acc:HGNC:10750]
ENSSSAG00000108653 UFSP1 ENSG00000176125 UFM1 specific peptidase 1 (inactive) [Source:HGNC Symbol;Acc:HGNC:33821]
ENSSSAG00000007672 RTKN ENSG00000114993 rhotekin [Source:HGNC Symbol;Acc:HGNC:10466]
ENSSSAG00000005588 RASSF9 ENSG00000198774 Ras association domain family member 9 [Source:HGNC Symbol;Acc:HGNC:15739]
ENSSSAG00000086990 N/A N/A N/A

349 genes were upregulated exclusively in GATA3 and 258 were converted to human orthologs.

These 256 genes were used for over-representation analysis Figure 18. Only 4 pathways were enriched in these upregulated genes. None of them was immune related.

Code
gata3_ora_upreg_4wpc <-
  enrichGO(
    gene = gata3_upregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )
Code
as_tibble(gata3_ora_upreg_4wpc) %>%
  dplyr::slice(1:20) %>%
  mutate(Description = fct_reorder(Description, p.adjust)) %>%  # sorting 'Description' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3,
           ) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 18: Enriched pathways from GATA3 exclusive upregulated genes at 4 WPC (top 20 pathways by adjusted p-value)
Code
as_tibble(gata3_ora_upreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:20) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 22: GATA3 exclusive genes GO Term translation
GO Term Description
GO:0043043 peptide biosynthetic process
GO:0006412 translation
GO:0002181 cytoplasmic translation

Over-representation analysis of EOMES exclusive UPREGULATED genes at 4 WPC

Code
eomes_exclusive_up_genes_4wpc <-
  setdiff(eomes_4wpc_up,
          c(dnavaccine_4wpc_up, gata3_4wpc_up, ivhd_4wpc_up, ivld_4wpc_up))

# length(eomes_exclusive_up_genes_4wpc)

eomes_upregulated_orthologs_4wpc <- gorth(
  query = eomes_exclusive_up_genes_4wpc,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

eomes_upregulated_orthologs_tbl_4wpc <-
  as_tibble(eomes_upregulated_orthologs_4wpc) %>% dplyr::select(.,
                                                                   input,
                                                                   ortholog_name,
                                                                   ortholog_ensg,
                                                                   description) %>% dplyr::rename(
                                                                     .,
                                                                     ssalar_ensembl = input,
                                                                     hsapiens_ortholog = ortholog_name,
                                                                     hsapiens_ensembl = ortholog_ensg,
                                                                     description = description
                                                                   )

eomes_upregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 23: EOMES exclusive upregulated orthologs at 4 WPC, first 20 rows
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000044078 PAPPA2 ENSG00000116183 pappalysin 2 [Source:HGNC Symbol;Acc:HGNC:14615]
ENSSSAG00000116632 GFRA2 ENSG00000168546 GDNF family receptor alpha 2 [Source:HGNC Symbol;Acc:HGNC:4244]
ENSSSAG00000093513 N/A N/A N/A
ENSSSAG00000056038 GLRA1 ENSG00000145888 glycine receptor alpha 1 [Source:HGNC Symbol;Acc:HGNC:4326]
ENSSSAG00000078714 OSBPL1A ENSG00000141447 oxysterol binding protein like 1A [Source:HGNC Symbol;Acc:HGNC:16398]
ENSSSAG00000101638 N/A N/A N/A
ENSSSAG00000115817 N/A N/A N/A
ENSSSAG00000085775 N/A N/A N/A
ENSSSAG00000096367 N/A N/A N/A
ENSSSAG00000100368 N/A N/A N/A
ENSSSAG00000075250 N/A N/A N/A
ENSSSAG00000002605 XPNPEP3 ENSG00000196236 X-prolyl aminopeptidase 3 [Source:HGNC Symbol;Acc:HGNC:28052]
ENSSSAG00000088168 N/A N/A N/A
ENSSSAG00000042950 FAM20A ENSG00000108950 FAM20A golgi associated secretory pathway pseudokinase [Source:HGNC Symbol;Acc:HGNC:23015]
ENSSSAG00000105856 N/A N/A N/A
ENSSSAG00000068404 PI4KB ENSG00000143393 phosphatidylinositol 4-kinase beta [Source:HGNC Symbol;Acc:HGNC:8984]
ENSSSAG00000006897 SLC45A2 ENSG00000164175 solute carrier family 45 member 2 [Source:HGNC Symbol;Acc:HGNC:16472]
ENSSSAG00000000688 PITPNC1 ENSG00000154217 phosphatidylinositol transfer protein cytoplasmic 1 [Source:HGNC Symbol;Acc:HGNC:21045]
ENSSSAG00000070217 MYH7 ENSG00000092054 myosin heavy chain 7 [Source:HGNC Symbol;Acc:HGNC:7577]
ENSSSAG00000038530 N/A N/A N/A
Code
eomes_ora_upreg_4wpc <-
  enrichGO(
    gene = eomes_upregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )
Code
as_tibble(eomes_ora_upreg_4wpc) %>%
  dplyr::slice(1:20) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'ID' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3,
           ) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Out of 255 (upregulated) genes used in over-representation analysis, 0 pathways were over-represented.

Over-representation analysis of EOMES-GATA3 intersection UPREGULATED genes at 4 WPC

Code
intersectionGE <- intersect(gata3_4wpc_up, eomes_4wpc_up)

# length(intersection2)

gata3EOMES_exclusive_genes_up <- setdiff(intersectionGE,
                                           c(dnavaccine_4wpc_up, ivhd_4wpc_up, ivld_4wpc_up))

# length(gata3EOMES_exclusive_genes_up)

gata3EOMES_upregulated_orthologs_4wpc <- gorth(
  query = gata3EOMES_exclusive_genes_up,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

gata3EOMES_upregulated_orthologs_tbl_4wpc <-
  as_tibble(gata3EOMES_upregulated_orthologs_4wpc) %>% dplyr::select(.,
                                                                   input,
                                                                   ortholog_name,
                                                                   ortholog_ensg,
                                                                   description) %>% dplyr::rename(
                                                                     .,
                                                                     ssalar_ensembl = input,
                                                                     hsapiens_ortholog = ortholog_name,
                                                                     hsapiens_ensembl = ortholog_ensg,
                                                                     description = description
                                                                   )

gata3EOMES_upregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 24: EOMES-GATA3 intersection upregulated orthologs at 4 WPC, first 20 rows
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000045171 IGFBP7 ENSG00000163453 insulin like growth factor binding protein 7 [Source:HGNC Symbol;Acc:HGNC:5476]
ENSSSAG00000010340 COL6A2 ENSG00000142173 collagen type VI alpha 2 chain [Source:HGNC Symbol;Acc:HGNC:2212]
ENSSSAG00000085630 N/A N/A N/A
ENSSSAG00000079527 NIBAN2 ENSG00000136830 niban apoptosis regulator 2 [Source:HGNC Symbol;Acc:HGNC:25282]
ENSSSAG00000121679 N/A N/A N/A
ENSSSAG00000070347 MOBP ENSG00000168314 myelin associated oligodendrocyte basic protein [Source:HGNC Symbol;Acc:HGNC:7189]
ENSSSAG00000086945 PPDPF ENSG00000125534 pancreatic progenitor cell differentiation and proliferation factor [Source:HGNC Symbol;Acc:HGNC:16142]
ENSSSAG00000068629 NMT1 ENSG00000136448 N-myristoyltransferase 1 [Source:HGNC Symbol;Acc:HGNC:7857]
ENSSSAG00000056040 RHAG ENSG00000112077 Rh associated glycoprotein [Source:HGNC Symbol;Acc:HGNC:10006]
ENSSSAG00000089701 N/A N/A N/A
ENSSSAG00000086795 FBXO32 ENSG00000156804 F-box protein 32 [Source:HGNC Symbol;Acc:HGNC:16731]
ENSSSAG00000100606 N/A N/A N/A
ENSSSAG00000094473 N/A N/A N/A
ENSSSAG00000094946 TSSC4 ENSG00000184281 tumor suppressing subtransferable candidate 4 [Source:HGNC Symbol;Acc:HGNC:12386]
ENSSSAG00000047323 MYBPC1 ENSG00000196091 myosin binding protein C1 [Source:HGNC Symbol;Acc:HGNC:7549]
ENSSSAG00000089276 RGCC ENSG00000102760 regulator of cell cycle [Source:HGNC Symbol;Acc:HGNC:20369]
ENSSSAG00000039832 IRS2 ENSG00000185950 insulin receptor substrate 2 [Source:HGNC Symbol;Acc:HGNC:6126]
ENSSSAG00000096709 HDAC9 ENSG00000048052 histone deacetylase 9 [Source:HGNC Symbol;Acc:HGNC:14065]
ENSSSAG00000008223 FNDC1 ENSG00000164694 fibronectin type III domain containing 1 [Source:HGNC Symbol;Acc:HGNC:21184]
ENSSSAG00000042825 JAK2 ENSG00000096968 Janus kinase 2 [Source:HGNC Symbol;Acc:HGNC:6192]
Code
gata3EOMES_ora_upreg_4wpc <-
  enrichGO(
    gene = gata3EOMES_upregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )
Code
as_tibble(gata3EOMES_ora_upreg_4wpc) %>%
  dplyr::slice(1:20) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'ID' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3,
           ) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 19: Enriched pathways from EOMES-GATA3 intersection upregulated genes at 4 WPC
Code
as_tibble(gata3EOMES_ora_upreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:20) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 25: EOMES-GATA3 intersection genes GO Term translation
GO Term Description
GO:0010927 cellular component assembly involved in morphogenesis
GO:0055002 striated muscle cell development
GO:0030239 myofibril assembly
GO:0022613 ribonucleoprotein complex biogenesis
GO:0051100 negative regulation of binding
GO:0055001 muscle cell development
GO:0042254 ribosome biogenesis
GO:0045214 sarcomere organization
GO:0051098 regulation of binding
GO:0051101 regulation of DNA binding

The 171 orthologs upregulated in the EOMES-GATA3 intersection were over-represented in only 10 pathways, none of them immune-related.

Over-representation analysis of GATA3-EOMES-DNA vaccine-IVHD intersection UPREGULATED genes at 4 WPC

Code
intersectionGEDI_up <- Reduce(intersect, list(gata3_4wpc_up, eomes_4wpc_up, dnavaccine_4wpc_up, ivhd_4wpc_up))
gata3_eomes_dnavaccine_ivhd_intersection <- setdiff(intersectionGEDI, ivld_4wpc_up)


gata3_eomes_dnavaccine_ivhd_intersection_orthologs <- gorth(
  query = gata3_eomes_dnavaccine_ivhd_intersection,
  source_organism = 'ssalar',
  target_organism = 'hsapiens',
  mthreshold = 1,
  filter_na = F
)

gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs <-
  as_tibble(gata3_eomes_dnavaccine_ivhd_intersection_orthologs) %>% dplyr::select(.,
                                                              input,
                                                              ortholog_name,
                                                              ortholog_ensg,
                                                              description) %>% dplyr::rename(
                                                                .,
                                                                ssalar_ensembl = input,
                                                                hsapiens_ortholog = ortholog_name,
                                                                hsapiens_ensembl = ortholog_ensg,
                                                                description = description
                                                                )


gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs %>% head(., n = 20) %>%
  kable(
    booktabs = TRUE,
    col.names = c(
      'Salmon ENSEMBL',
      'Human ortholog',
      'Human ENSEMBL',
      'Description'
    ),
    align = 'c') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 26: GATA3-EOMES-DNA vaccine-IVHD intersection upregulated orthologs at 4WPC (first 20 rows)
Salmon ENSEMBL Human ortholog Human ENSEMBL Description
ENSSSAG00000051579 N/A N/A N/A
ENSSSAG00000002480 HSP90AB1 ENSG00000096384 heat shock protein 90 alpha family class B member 1 [Source:HGNC Symbol;Acc:HGNC:5258]
ENSSSAG00000008058 CHMP1B ENSG00000255112 charged multivesicular body protein 1B [Source:HGNC Symbol;Acc:HGNC:24287]
ENSSSAG00000045085 CRLF3 ENSG00000176390 cytokine receptor like factor 3 [Source:HGNC Symbol;Acc:HGNC:17177]
ENSSSAG00000042725 ACTR1A ENSG00000138107 actin related protein 1A [Source:HGNC Symbol;Acc:HGNC:167]
ENSSSAG00000004180 SAT1 ENSG00000130066 spermidine/spermine N1-acetyltransferase 1 [Source:HGNC Symbol;Acc:HGNC:10540]
ENSSSAG00000002265 COPB2 ENSG00000184432 COPI coat complex subunit beta 2 [Source:HGNC Symbol;Acc:HGNC:2232]
ENSSSAG00000043619 EIF3J ENSG00000104131 eukaryotic translation initiation factor 3 subunit J [Source:HGNC Symbol;Acc:HGNC:3270]
ENSSSAG00000003377 RAB5C ENSG00000108774 RAB5C, member RAS oncogene family [Source:HGNC Symbol;Acc:HGNC:9785]
ENSSSAG00000078656 PDCD6 ENSG00000249915 programmed cell death 6 [Source:HGNC Symbol;Acc:HGNC:8765]
ENSSSAG00000041828 JUP ENSG00000173801 junction plakoglobin [Source:HGNC Symbol;Acc:HGNC:6207]
ENSSSAG00000102439 ZYX ENSG00000159840 zyxin [Source:HGNC Symbol;Acc:HGNC:13200]
ENSSSAG00000063331 TLN1 ENSG00000137076 talin 1 [Source:HGNC Symbol;Acc:HGNC:11845]
ENSSSAG00000043967 CD63 ENSG00000135404 CD63 molecule [Source:HGNC Symbol;Acc:HGNC:1692]
ENSSSAG00000080154 N/A N/A N/A
ENSSSAG00000071453 SPINT2 ENSG00000167642 serine peptidase inhibitor, Kunitz type 2 [Source:HGNC Symbol;Acc:HGNC:11247]
ENSSSAG00000005722 MLKL ENSG00000168404 mixed lineage kinase domain like pseudokinase [Source:HGNC Symbol;Acc:HGNC:26617]
ENSSSAG00000002698 VCP ENSG00000165280 valosin containing protein [Source:HGNC Symbol;Acc:HGNC:12666]
ENSSSAG00000046480 PTP4A2 ENSG00000184007 protein tyrosine phosphatase 4A2 [Source:HGNC Symbol;Acc:HGNC:9635]
ENSSSAG00000076348 GSTO1 ENSG00000148834 glutathione S-transferase omega 1 [Source:HGNC Symbol;Acc:HGNC:13312]
Code
GATA3eomesDNAVACCINEivhd_intersection_ora_upreg_4wpc <-
  enrichGO(
    gene = gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs$hsapiens_ensembl,
    OrgDb = 'org.Hs.eg.db',
    keyType = 'ENSEMBL',
    ont = 'BP',
    minGSSize = 10,
    maxGSSize = 1000,
    pAdjustMethod = 'BH',
    pvalueCutoff = 0.05,
    readable = T
  )

The 334 salmon ENSEMBL gene IDs from the intersection between GATA3, EOMES, DNA vaccine, and IV-HD were converted to 268 human orthologs. These 268 genes were used for over-representation analysis Figure 20, and filtered by immune-related term in Figure 21.

Code
as_tibble(GATA3eomesDNAVACCINEivhd_intersection_ora_upreg_4wpc) %>%
  dplyr::slice(1:20) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'ID' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Reds',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 20: Enriched pathways from GATA3-EOMES-DNA vaccine-IVHD intersection upregulated genes at 4 WPC (top 20 pathways by adjusted p-value)
Code
as_tibble(GATA3eomesDNAVACCINEivhd_intersection_ora_upreg_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:20) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 27: GATA3-EOMES-DNA-IVHD vaccine intersection upregulated GO Term translation
GO Term Description
GO:0007229 integrin-mediated signaling pathway
GO:0042110 T cell activation
GO:0060627 regulation of vesicle-mediated transport
GO:0002685 regulation of leukocyte migration
GO:0042330 taxis
GO:0060326 cell chemotaxis
GO:0006935 chemotaxis
GO:0033627 cell adhesion mediated by integrin
GO:0097435 supramolecular fiber organization
GO:0030595 leukocyte chemotaxis
GO:0050853 B cell receptor signaling pathway
GO:0045807 positive regulation of endocytosis
GO:0031589 cell-substrate adhesion
GO:0030100 regulation of endocytosis
GO:0002688 regulation of leukocyte chemotaxis
GO:0007015 actin filament organization
GO:0030029 actin filament-based process
GO:0006909 phagocytosis
GO:0030036 actin cytoskeleton organization
GO:0006897 endocytosis
Code
upregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc <-
  filter_rows_by_GO_term(GATA3eomesDNAVACCINEivhd_intersection_ora_upreg_4wpc, immune_related_GOterms, 'goterms')

as_tibble(upregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc) %>%
  dplyr::slice(1:10) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%  # sorting 'ID' by Adjusted p-value
  ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
  geom_bar(stat = "identity",
           colour = 'black',
           linewidth = 0.3) +
  coord_flip() +
  scale_fill_distiller(palette = 'Purples',
                       name = 'Adjusted \n p-value',
                       guide = guide_colorbar(reverse = TRUE)) +
  scale_y_continuous() +
  guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
  labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
  theme_classic() +

  theme(axis.text.y = element_text(
    color = "black",
    size = 14,
    face = "plain"
  )) +
  theme(legend.position = 'right') +
  theme(axis.title = element_text()) +
  theme(
    text = element_text(family = 'serif', size = 14),
    plot.margin = margin(
      t = 5,
      r = 0,
      l = 0,
      b = 0
    ),
    plot.title = element_text(hjust = 0)
  )

Figure 21: Enriched immune pathways from GATA3-EOMES-DNA vaccine-IVHD intersection upregulated genes at 4 WPC (top 10 pathways by adjusted p-value)
Code
as_tibble(upregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc) %>%
  mutate(ID = fct_reorder(ID, p.adjust)) %>%
  dplyr::slice(1:10) %>%
  dplyr::select(ID, Description) %>%
  map_df(., rev) %>% 
  kable(
    booktabs = TRUE,
        col.names = c(
      'GO Term',
      'Description'
    ),
    align = 'l') %>%
  kableExtra::row_spec(., row = 0, italic = TRUE) %>%
  kableExtra::kable_styling(font_size = 12)
Table 28: GATA3-EOMES-DNA vaccine-IVHD intersection (immune) GO Term translation
GO Term Description
GO:0042110 T cell activation
GO:0002685 regulation of leukocyte migration
GO:0042330 taxis
GO:0060326 cell chemotaxis
GO:0006935 chemotaxis
GO:0030595 leukocyte chemotaxis
GO:0050853 B cell receptor signaling pathway
GO:0031589 cell-substrate adhesion
GO:0002688 regulation of leukocyte chemotaxis
GO:0006909 phagocytosis

The number of enriched (over-represented) pathways was 631, of which 196 were immune-related pathways.

Gene Set Enrichment Analysis of commonly regulated genes

Besides running over-representation analysis, I also tried to do Gene Set Enrichment Analysis (GSEA). While ORA is based on the overlap of gene sets, GSEA considers the distribution of gene expression changes in a dataset. In this case, I started from genes that were commonly expressed in all treatments in relation to CONU. The sum of the center of both Venn diagrams, since GSEA uses down- and upregulated at the same time, as input. These were converted to orthologs, and because salmon ENSEMBL ID to human ortholog conversion yields repeated gene symbols, I had to select one gene ‘version’ per treatment (the one with the lowest adjusted p-value)

Code
# Loop through each treatment and apply the function, also adding a column with treatment information

# Vertically merge data frames from all treatments and remove NA
merged_df <- do.call(rbind, result_list) %>% na.omit()

# Reduce and intersect the ID column in result_list to find common differentially regulated genes among all treatments
reduced_intersected_ids <-
  Reduce(intersect, lapply(result_list, function(df)
    df$ID))

# Find the common IDs between reduced_intersected_ids and merged_df. This way we are keeping only the common regulated salmon genes
common_ids <- intersect(merged_df$ID, reduced_intersected_ids)

# Subset merged_df to keep only rows with common IDs
# merged_subset <- merged_df[merged_df$ID %in% common_ids,]
merged_subset <-
  subset(merged_df, ID %in% common_ids)  # tidy version

# Splitting between up and downregulated genes, and sorting by gene name and adjusted p-value
upregulated_gsea <-
  merged_subset %>% dplyr::filter(log2FC > 0) %>% arrange(ortholog_name, adjusted_p.val)

downregulated_gsea <-
  merged_subset %>% dplyr::filter(log2FC < 0) %>% arrange(ortholog_name, adjusted_p.val)


# Keeping just one of the ortholog copies, the one with the lowest adjusted p-value
unique_upregulated <-
  upregulated_gsea[!duplicated(upregulated_gsea$ortholog_ensg),] %>% arrange(ortholog_name)
unique_downregulated <-
  downregulated_gsea[!duplicated(downregulated_gsea$ortholog_ensg),] %>% arrange(ortholog_name)

# Merging down and upregulated dataframes after removing duplicates
enrichment_unique_common <-
  rbind(unique_downregulated, unique_upregulated) %>% dplyr::arrange(desc(log2FC))

# Arranging matrix for GSEA
enrichment_gsea_common <- enrichment_unique_common$log2FC

names(enrichment_gsea_common) <-
  enrichment_unique_common$ortholog_ensg
Code
# Running GSEA on common regulated genes
#| label: gsea-common-genes-4wpc
#| warning: false
#| cache: true

gsea_common <- gseGO(
  geneList = enrichment_gsea_common,
  OrgDb = org.Hs.eg.db,
  keyType = 'ENSEMBL',
  ont = 'BP',
  pvalueCutoff = 0.05,
  pAdjustMethod = 'BH',
  eps = 0,
  verbose = T
)
Code
edox <- setReadable(gsea_common, 'org.Hs.eg.db', 'ENSEMBL')

cnetplot(
  edox,
  color.params = list(foldChange = enrichment_gsea_common),
  circular = T,
  colorEdge = TRUE,
  cex_category = 1,
  cex_gene = 1,
  cex_label_category = 0.8,
  cex_label_gene = 0.8
)